set matsize 11000

* ) Insert directory
cd ""
use "Author-paper-referee level data", clear

**********************
* ) Still need to collapse to paper-reviewer level, calculate most prominent author

*Note: People tied for top don't get a 1 coded. Rank rates smallest to largest
*Ranking by T5

gen dummy = published_from_5_no_pp+1
replace dummy = 1/dummy
bysort ms_number_id referee_id: egen max_T5 = rank(dummy)
drop dummy
*order published_from_5_no_pp max_T5, after(ms_number_id)
tab max_T5, m
*Ranking by institution of employment
gen dummy = IDEASemploymentrank
replace dummy = 100000 if dummy==.
bysort ms_number_id referee_id: egen max_employmentrank = rank(dummy)
drop dummy
tab max_employmentrank, m
*Ranking by total number of pubs
gen dummy = published_papers+1
replace dummy = 1/dummy
bysort ms_number_id referee_id: egen max_publications = rank(dummy)
drop dummy
tab max_publications, m
*Ranking by oldest
gen dummy = phdyear
replace dummy = 100000 if dummy==.
bysort ms_number_id referee_id: egen max_phdyear = rank(dummy)
drop dummy
tab max_phdyear, m
*Ranking by phd rank
gen dummy = IDEASrank
replace dummy = 100000 if dummy==.
bysort ms_number_id referee_id: egen max_phdrank = rank(dummy)
drop dummy 
tab max_phdrank, m

*Most famous author is one with most T5
gen famous_author = 0
replace famous_author = 1 if max_T5==1
*Next tiebreak: department rank
bysort ms_number_id: egen dummy = max(famous_author)
replace famous_author = 1 if dummy==0 & max_employmentrank==1
drop dummy
*Next tiebreak: total publications
bysort ms_number_id: egen dummy = max(famous_author)
replace famous_author = 1 if dummy==0 & max_publications==1
drop dummy
*Next tiebreak: PhD year
bysort ms_number_id: egen dummy = max(famous_author)
replace famous_author = 1 if dummy==0 & max_phdyear==1
drop dummy
*Next tiebreak: PhD rank
bysort ms_number_id: egen dummy = max(famous_author)
replace famous_author = 1 if dummy==0 & max_phdrank==1
drop dummy

bysort ms_number_id: egen dummy = max(famous_author)
count if dummy==0
tab yearsPhD if dummy==0, m
*Over half are people we couldn't find, another 25% are graduate students

* ) Randomly pick first author as "famous" one in cases of tie
bysort ms_number_id referee_id: replace famous_author=1 if _n==1 & dummy==0
drop dummy

* ) If we only keep famous authors, then we should have author-reviewer-paper level data
keep if famous_author==1 

***************************************
* ) Just controlling for author's characteristic
global authorcontrols female gender_missing published_papers published_from_5_no_pp USNEWS_1-USNEWS_5 is_nber ///
	employmentrank1 employmentrank2 employmentrank3 employmentrank4 employmentrank5 NBER_AG-NBER_PE degree_1 degree_2
sum $authorcontrols

* ) Top-coding distance and include missing
replace referee_distance = 6 if referee_distance>6 | referee_distance==.
replace editor_distance = 6 if editor_distance>6 | editor_distance==.

*) When doing editor analysis, don't double count observations due to referee assignment
bysort ms_number_id editor_id: gen indicate = 1 if _n==1

* ) For model with "number of direct connections between author and reviewer/editor"
gen referee_one_degree = (referee_distance==1)
gen editor_one_degree = (editor_distance==1)
gen referee_Nconnections = referee_one_degree + referee_phdmatch + referee_employmentmatch + referee_isnbermatch_program 
gen editor_Nconnections = editor_one_degree + editor_phdmatch + editor_employmentmatch + editor_isnbermatch_program 
drop referee_one_degree editor_one_degree
*Generating indicators for each category
tab referee_Nconnections, generate(referee_Nconnections_raw) m
tab editor_Nconnections, generate(editor_Nconnections_raw) m
*Top code 2+ connections
replace referee_Nconnections_raw3 = referee_Nconnections_raw3+referee_Nconnections_raw4
replace editor_Nconnections_raw3 = editor_Nconnections_raw3+editor_Nconnections_raw4+editor_Nconnections_raw5

**************
**************
**************
*Employment and PhD
eststo clear
*PhD Institution 
eststo: reghdfe passed_the_desk editor_phdmatch $authorcontrols if indicate==1, absorb(editor_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE " ",replace
estadd local authorcontrols "X",replace
*Same Employment
eststo: reghdfe passed_the_desk editor_employmentmatch $authorcontrols if indicate==1, absorb(editor_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE " ",replace
estadd local authorcontrols "X",replace
*NBER programs
eststo: reghdfe passed_the_desk editor_isnbermatch_program $authorcontrols if indicate==1, absorb(editor_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE " ",replace
estadd local authorcontrols "X",replace
*Degrees of separation
eststo: reghdfe passed_the_desk editor_distance_raw1-editor_distance_raw3 $authorcontrols if indicate==1, absorb(editor_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE " ",replace
estadd local authorcontrols "X",replace
*Everything together
eststo: reghdfe passed_the_desk editor_phdmatch editor_employmentmatch editor_isnbermatch_program editor_distance_raw1-editor_distance_raw3 $authorcontrols if indicate==1, absorb(editor_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE " ",replace
estadd local authorcontrols "X",replace
*Number of connections
eststo: reghdfe passed_the_desk editor_Nconnections_raw2-editor_Nconnections_raw3 $authorcontrols if indicate==1, absorb(editor_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE " ",replace
estadd local authorcontrols "X",replace

label var editor_phdmatch "\underline{Outcome: Passed the desk} \\ \ \ Exact same PhD institution "
label var editor_employmentmatch "\ \ Former/current colleagues"
label var editor_isnbermatch_program "\ \ Same NBER program(s)"
label var editor_distance_raw1 "\ \ Degrees of separation: 1"
label var editor_distance_raw2 "\ \ Degrees of separation: 2"
label var editor_distance_raw3 "\ \ Degrees of separation: 3"
label var editor_distance_raw4 "\ \ Degrees of separation: 4"
label var editor_distance_raw5 "\ \ Degrees of separation: 5"
label var editor_distance_raw6 "\ \ Degrees of separation: 6+"
label var editor_Nconnections_raw2 "\hline \# of direct matches: \\ \ \ - One match"
label var editor_Nconnections_raw3 "\ \ - Two+ matches"

esttab using "editormatch_famousauthor.tex", ///
    replace se(3) b(3) star(* 0.10 ** 0.05 *** 0.01) noobs nonum nonotes nogaps ///
	s(N refFE authorcontrols, label("Editor-papers" "Editor FE" "Author controls") fmt(%12.0f)) ///
	mtitles("(1)" "(2)" "(3)" "(4)" "(5)" "(6)" "(7)" "(8)" "(9)" "(10)" "(11)" "(12)" "(13)" "(14)" "(15)") label ///
	keep(editor_phdmatch editor_employmentmatch editor_isnbermatch_program editor_distance* editor_Nconnections_raw2 editor_Nconnections_raw3)

**************
* ) Reviewers
eststo clear

*PhD Institution
eststo: reghdfe RR referee_phdmatch, absorb(referee_id ms_number_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE "X",replace
estadd local authorcontrols "X",replace
*Same Employment
eststo: reghdfe RR referee_employmentmatch, absorb(referee_id ms_number_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE "X",replace
estadd local authorcontrols "X",replace
*NBER programs
eststo: reghdfe RR referee_isnbermatch_program, absorb(referee_id ms_number_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE "X",replace
estadd local authorcontrols "X",replace
*Degrees of separation
eststo: reghdfe RR referee_distance_raw1-referee_distance_raw3, absorb(referee_id ms_number_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE "X",replace
estadd local authorcontrols "X",replace
*Everything together
eststo: reghdfe RR referee_phdmatch referee_employmentmatch referee_isnbermatch_program referee_distance_raw1-referee_distance_raw3, absorb(referee_id ms_number_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE "X",replace
estadd local authorcontrols "X",replace
*Number of connections
eststo: reghdfe RR referee_Nconnections_raw2-referee_Nconnections_raw3, absorb(referee_id ms_number_id) vce(cluster ms_number_id)
estadd local refFE "X",replace
estadd local paperFE "X",replace
estadd local authorcontrols "X",replace

label var referee_phdmatch "\underline{Outcome: Positive evaluation} \\ \ \ Exact same PhD institution "
label var referee_employmentmatch "\ \ Former/current colleagues"
label var referee_isnbermatch_program "\ \ Same NBER program(s)"
label var referee_distance_raw1 "\ \ Degrees of separation: 1"
label var referee_distance_raw2 "\ \ Degrees of separation: 2"
label var referee_distance_raw3 "\ \ Degrees of separation: 3"
label var referee_distance_raw4 "\ \ Degrees of separation: 4"
label var referee_distance_raw5 "\ \ Degrees of separation: 5"
label var referee_distance_raw6 "\ \ Degrees of separation: 6+"
label var referee_Nconnections_raw2 "\hline \# of direct matches: \\ \ \ - One match"
label var referee_Nconnections_raw3 "\ \ - Two+ matches"

esttab using "refereematch_famousauthor.tex", ///
    replace se(3) b(3) star(* 0.10 ** 0.05 *** 0.01) noobs nonum nonotes nogaps ///
	s(N refFE paperFE, label("Reviewer-papers" "Reviewer FE" "Paper FE") fmt(%12.0f)) ///
	mtitles("(1)" "(2)" "(3)" "(4)" "(5)" "(6)" "(7)" "(8)" "(9)" "(10)" "(11)" "(12)" "(13)" "(14)" "(15)") label ///
	keep(referee_phdmatch referee_employmentmatch referee_isnbermatch_program referee_distance* referee_Nconnections_raw2 referee_Nconnections_raw3)
